'''
Created on Mar 15, 2013

@author: arenduchintala
'''
errors = {}

if __name__ == "__main__":
    preds = open('../conll/pos.out','r').readlines()
    trues =  open('../conll/pos.key','r').readlines()
    correct = 0
    total = 0
    for ps, ts in zip(preds, trues):
        if ps == ts:
            correct += 1
        else:
            print ps.rstrip(), ts.rstrip()
            k = ts.strip().split(" ")[1] +"->" + ps.strip().split(" ")[1]
            if (errors.has_key(k)):
                update_k = errors.get(k)
                update_k.append(ps.strip().split(" ")[0])
                errors[k] = update_k
            else:
                update_k = [ps.strip().split(" ")[0]]
                errors[k] = update_k
                                   
        total +=1
    
    for k in errors.keys():
        print k, len(errors[k]), errors[k]
    print float(correct)/total
        
    